gorgonia.org/gorgonia@v0.9.17/cuda modules/src/elemunaryop.cu

gorgonia.org/gorgonia@v0.9.17/cuda modules/src/elemunaryop.cu (about)

     1  #define _USE_MATH_DEFINES
     2  #include <math.h>
     3  
     4  #define THREADID \
     5  	int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;\
     6  	int idx = blockId * (blockDim.x * blockDim.y * blockDim.z) + (threadIdx.z * (blockDim.x * blockDim.y)) + (threadIdx.y * blockDim.x) + threadIdx.x;
     7  
     8  #define CHECKSIZE \
     9  	if (idx >= size) { \
    10  		return; \
    11  	}
    12  
    13  #define UNARYOP(name, t, type, op)\
    14  	__global__ void name ##_##t (type* A, int size) { \
    15  		THREADID \
    16  		CHECKSIZE \
    17  		A[idx] = op(A[idx]); \
    18  	}
    19  
    20  
    21  extern "C" { UNARYOP(cos, f64, double, cos) }
    22  extern "C" { UNARYOP(sin, f64, double, sin) }
    23  extern "C" { UNARYOP(exp, f64, double, exp) }
    24  extern "C" { UNARYOP(ln, f64, double, log) }
    25  extern "C" { UNARYOP(log2, f64, double, log2) }
    26  extern "C" { UNARYOP(sqrt, f64, double, sqrt) }
    27  extern "C" { UNARYOP(tanh, f64, double, tanh) }
    28  extern "C" { UNARYOP(log1p, f64, double, log1p) }
    29  extern "C" { UNARYOP(expm1, f64, double, expm1) }
    30  
    31  // un-differentiable
    32  extern "C" { UNARYOP(abs, f64, double, abs) }
    33  extern "C" { UNARYOP(ceil, f64, double, ceil) }
    34  extern "C" { UNARYOP(floor, f64, double, floor) }
    35  
    36  
    37  extern "C" {
    38  	__global__ void sign_f64(double* A, int size) {
    39  		THREADID
    40  		CHECKSIZE
    41  		A[idx] = (A[idx] > 0.0) - (A[idx] < 0.0);
    42  	}
    43  }
    44  
    45  extern "C" {
    46  	__global__ void square_f64(double* A, int size) {
    47  		THREADID
    48  		CHECKSIZE
    49  		A[idx] = A[idx] * A[idx];
    50  	}
    51  }
    52  
    53  extern "C" {
    54  	__global__ void cube_f64(double* A, int size) {
    55  		THREADID
    56  		CHECKSIZE
    57  		A[idx] = A[idx] * A[idx] * A[idx];
    58  	}
    59  }
    60  
    61  extern "C" {
    62  	__global__ void neg_f64(double* A, int size) {
    63  		THREADID
    64  		CHECKSIZE
    65  		A[idx] = -A[idx];
    66  	}
    67  }
    68  
    69  extern "C" {
    70  	__global__ void inverse_f64(double* A, int size) {
    71  		THREADID
    72  		CHECKSIZE
    73  		A[idx] = 1.0/A[idx];
    74  	}	
    75  }
    76  
    77  extern "C" {
    78  	__global__ void softplus_f64(double* A, int size) {
    79  		THREADID
    80  		CHECKSIZE
    81  		if (A[idx] < -708.0) {
    82  			A[idx] = 0.0;
    83  		} else if (A[idx] > 16.0) {
    84  			// no op
    85  		} else {
    86  			A[idx] = log1p(exp(A[idx]));
    87  		}
    88  	}
    89  }
    90  
    91  extern "C" {
    92  	__global__ void sigmoid_f64(double* A, int size) {
    93  		THREADID
    94  		CHECKSIZE
    95  		if (A[idx] < -709.0) {
    96  			A[idx] = 0.0;
    97  		} else if (A[idx] > 19.0) {
    98  			A[idx] = 1.0;
    99  		} else {
   100  			A[idx] = 1.0 / (1.0 + exp(-A[idx]));
   101  		}
   102  		// alternative sigmoid function:
   103  		// A[idx] = 1 / (1 + pow(M_E, (double)(-1 * A[idx])));
   104  	}	
   105  }
   106  
   107  /* FLOAT32 */
   108  
   109  extern "C" { UNARYOP(cos, f32, float, cosf) }
   110  extern "C" { UNARYOP(sin, f32, float, sinf) }
   111  extern "C" { UNARYOP(exp, f32, float, expf) }
   112  extern "C" { UNARYOP(ln, f32, float, logf) }
   113  extern "C" { UNARYOP(log2, f32, float, log2f) }
   114  extern "C" { UNARYOP(sqrt, f32, float, sqrtf) }
   115  extern "C" { UNARYOP(tanh, f32, float, tanhf) }
   116  extern "C" { UNARYOP(log1p, f32, float, log1pf) }
   117  extern "C" { UNARYOP(expm1, f32, float, expm1f) }
   118  
   119  // un-differentiable
   120  extern "C" { UNARYOP(abs, f32, float, abs) }
   121  extern "C" { UNARYOP(ceil, f32, float, ceilf) }
   122  extern "C" { UNARYOP(floor, f32, float, floorf) }
   123  
   124  
   125  extern "C" {
   126  	__global__ void sign_f32(float* A, int size) {
   127  		THREADID
   128  		CHECKSIZE
   129  		A[idx] = (A[idx] > 0.0f) - (A[idx] < 0.0f);
   130  	}
   131  }
   132  
   133  extern "C" {
   134  	__global__ void square_f32(float* A, int size) {
   135  		THREADID
   136  		CHECKSIZE
   137  		A[idx] = A[idx] * A[idx];
   138  	}
   139  }
   140  
   141  extern "C" {
   142  	__global__ void cube_f32(float* A, int size) {
   143  		THREADID
   144  		CHECKSIZE
   145  		A[idx] = A[idx] * A[idx] * A[idx];
   146  	}
   147  }
   148  
   149  extern "C" {
   150  	__global__ void neg_f32(float* A, int size) {
   151  		THREADID
   152  		CHECKSIZE
   153  		A[idx] = -A[idx];
   154  	}
   155  }
   156  
   157  extern "C" {
   158  	__global__ void inverse_f32(float* A, int size) {
   159  		THREADID
   160  		CHECKSIZE
   161  		A[idx] = 1.0f/A[idx];
   162  	}	
   163  }
   164  
   165  extern "C" {
   166  	__global__ void softplus_f32(float* A, int size) {
   167  		THREADID
   168  		CHECKSIZE
   169  		if (A[idx] < -103.0f) {
   170  			A[idx] = 0.0f;
   171  		} else if (A[idx] > 14.0f) {
   172  			// no op
   173  		} else {
   174  			A[idx] = log1pf(expf(A[idx]));
   175  		}
   176  	}
   177  }
   178  
   179  extern "C" {
   180  	__global__ void sigmoid_f32(float* A, int size) {
   181  		THREADID
   182  		CHECKSIZE
   183  		if (A[idx] < -88.0f) {
   184  			A[idx] = 0.0f;
   185  		} else if (A[idx] > 15.0f) {
   186  			A[idx] = 1.0f;
   187  		} else {
   188  			A[idx] = 1.0f / (1.0f + expf(-A[idx]));
   189  		}
   190  		// alternative sigmoid function:
   191  		// A[idx] = 1 / (1 + powf((float)(M_E), (-1 * A[idx])));
   192  	}	
   193  }